# Alice theme ----
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
rdbuDiv <- c('#b2182b','#d6604d','#f4a582','#fddbc7','#f7f7f7','#d1e5f0','#92c5de','#4393c3','#2166ac')
cybrDiv <- c('#8c510a','#bf812d','#dfc27d','#f6e8c3','#f5f5f5','#c7eae5','#80cdc1','#35978f','#01665e')
pugrDiv <- c('#762a83','#9970ab','#c2a5cf','#e7d4e8','#f7f7f7','#d9f0d3','#a6dba0','#5aae61','#1b7837')
rdybuDiv <- c('#d73027', '#f46d43', '#fdae61', '#fee090', '#ffffbf', '#e0f3f8', '#abd9e9', '#74add1', '#4575b4')
cbDark <- c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')
cbMed <- c('#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854','#ffd92f')
cbLight <- c('#b3e2cd','#fdcdac','#cbd5e8','#f4cae4','#e6f5c9','#fff2ae')
# gray #999999 cbPalette[1]
# orange"#E69F00" cbPalette[2]
# blue #56B4E9 cbPalette[3]
# green "#009E73" cbPalette[4]
# yellow #F0E442 cbPalette[5]
# dark blue #0072B2 cbPalette[6]
# dark orange #D55E00 cbPalette[7]
# pink #CC79A7 cbPalette[8]
#windowsFonts("Helvetica" = windowsFont("Helvetica"))

aliceTheme <- theme(plot.title = element_text(family = "Helvetica",
                                              color = "black",
                                              face = "plain"),
                    axis.title.x = element_text(family = "Helvetica",
                                                color = "black",
                                                face = "plain"),
                    axis.title.y = element_text(family = "Helvetica",
                                                color = "black",
                                                face = "plain"),
                    axis.text = element_text(family = "Helvetica",
                                             color = "black",
                                             face = "plain"),
                    legend.text = element_text(family = "Helvetica", 
                                               size = 12, 
                                               color = "black",
                                               face = "plain"),
                    legend.key = element_blank(),
                    legend.title = element_blank(),
                    axis.ticks = element_line(color = "black"),
                    panel.grid = element_blank(),
                    panel.background = element_blank(),
                    panel.border = element_rect(linetype = "solid", 
                                                color = "black",
                                                size = 1,
                                                fill = NA)
)

# Functions ----
plot_clusters <- function(phos.data.clustering, title, cluster.method, V) {
  # Inputs: phos.data.clustering = data from phosphoproteomics experiments
  # to be clustered, title = plot title, cluster.method = projection_based or mclust
  # V[1] = cluster assignments as vector (projection based only)
  
  if (cluster.method == "projection_based") {
    phos.data.clustering <- data.frame(phos.data.clustering)
    if (typeof(V) == 'integer') {
      clusters <- data.frame(V)
    } 
    if (typeof(V) == 'list') {
      clusters <- data.frame(as.list(V[1]))
    } 
    phos.data.clustering$cluster <- clusters[[1]]
    
  }
  
  
  num.clusters <- max(phos.data.clustering$cluster)
  
  #filter by cluster, then in loop, plot:
  cluster.plots = vector(mode = "list", length = num.clusters)
  for(i in 1:length(cluster.plots)){
    phos.data.clustering.thiscluster <- phos.data.clustering %>%
      filter(cluster == i)
    cluster.plots[[i]] <- plot.peptide.profiles.preCR(phos.data.clustering.thiscluster)
    
  }
  
  plot.title = title
  return(grid.arrange(grobs = cluster.plots, ncol = 3, 
                      top = textGrob(plot.title, x = 0, hjust = -.1, vjust = 2)))
}

plot_clusters_with_heatmap <- function(phos.data.clustering, title, cluster.method, V) {
  # Inputs: phos.data.clustering = data from phosphoproteomics experiments
  # to be clustered, title = plot title, cluster.method = projection_based or mclust
  # V[1] = cluster assignments as vector (projection based only)
  
  if (cluster.method == "projection_based") {
    phos.data.clustering <- data.frame(phos.data.clustering)
    if (typeof(V) == 'integer') {
      clusters <- data.frame(V)
    } 
    if (typeof(V) == 'list') {
      clusters <- data.frame(as.list(V[1]))
    } 
    phos.data.clustering$cluster <- clusters[[1]]
    
  }
  
  num.clusters <- max(phos.data.clustering$cluster)
  min.l2fc <- min(phos.data.clustering %>%
                    select('Abundance.Ratio..log2....PBS..0.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..9.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..30.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..300.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..0.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..9.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..30.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..300.....PBS..0.'))
  max.l2fc <- max(phos.data.clustering %>%
                    select('Abundance.Ratio..log2....PBS..0.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..9.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..30.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..300.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..0.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..9.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..30.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..300.....PBS..0.'))
  
  main.legend <- draw(Legend(title = paste0("L2FC in Abundance",
                                            '\n', "(/PBS0)"), 
                             col_fun = colorRamp2(c(min.l2fc, 0, max.l2fc), c(rdbuDiv[9], rdbuDiv[5], rdbuDiv[1])))) %>% grid.grabExpr()
  anno.legend <- draw(Legend(title = "Phenotype Score", 
                             col_fun = colorRamp2(c(-7,0,3), c(cybrDiv[1], cybrDiv[5], cybrDiv[9])))) %>% grid.grabExpr()
  
  
  
  
  #filter by cluster, then in loop, plot:
  cluster.bothplots = vector(mode = "list", length = num.clusters+1)
  for(i in 1:3){
    phos.data.clustering.thiscluster <- phos.data.clustering %>%
      filter(cluster == i)
    cluster.bothplots[[i]] <- plot.peptide.profiles.preCR(phos.data.clustering.thiscluster)
  }
  # for(i in (num.clusters+1):(length(cluster.bothplots)/2 + num.clusters)){
  #   phos.data.clustering.thiscluster <- phos.data.clustering %>%
  #     filter(cluster == i-num.clusters)
  #   cluster.bothplots[[i]] <- plot.peptide.heatmaps(phos.data.clustering.thiscluster, min.l2fc, max.l2fc)
  # }
  # cluster.bothplots[[4]] <- plot.peptide.heatmaps(phos.data.clustering.thiscluster, min.l2fc, max.l2fc)
  legends.grob <- list(main.legend, anno.legend, lineplot.legend)
  plot.title = title
  
  cluster.bothplots[[4]] <- lineplot.legend
  cluster.bothplots[[5]] <- main.legend
  #more clusters
  for(i in 4:4){
    phos.data.clustering.thiscluster <- phos.data.clustering %>%
      filter(cluster == i)
    cluster.bothplots[[i+2]] <- plot.peptide.profiles.preCR(phos.data.clustering.thiscluster)
  }

  lay <- rbind(c(1, 2,3,6,4, 5),
               c(1, 2,3,6,4, 5))
  return(grid.arrange(grobs = cluster.bothplots, layout_matrix = lay, 
                      top = textGrob(plot.title, x = 0, hjust = -.1, vjust = 2)))
  
}

plot.peptide.profiles.preCR <- function (timecourse.dataset) {
  
  ## Input: Timecourse dataset with abundances, hits dataset
  
  all.data.hits <- timecourse.dataset
  ## Redoing num phos column
  all.data.hits$No.Phospho <- lapply(all.data.hits$Modifications.in.Master.Proteins, function(x) {
    return ((str_extract(x, "\\s.xPhospho")) %>%
              str_sub(start = 2, end = 2) %>%
              as.numeric())
  })
  
  ## Making line plots
  timepoint <- c(1,2,3,4)
  timepoints <- data.frame(timepoint) 
  
  variable <- c('PBS_Mean','PBS_Mean','PBS_Mean','PBS_Mean')
  PBSmeantext <- data.frame(variable) 
  IAAmean <- c('IAA_Mean','IAA_Mean','IAA_Mean','IAA_Mean')
  IAAmeantext <- data.frame(variable) 
  rm(variable)
  
  explore.proteins.scatter.data.pbs <-
    all.data.hits %>% 
    select(
      'Abundance.Ratio..log2....PBS..0.....PBS..0.',
      'Abundance.Ratio..log2....PBS..9.....PBS..0.',
      'Abundance.Ratio..log2....PBS..30.....PBS..0.',
      'Abundance.Ratio..log2....PBS..300.....PBS..0.') %>% 
    t %>%
    set_colnames (all.data.hits$unique.id.3.phossite) %>%
    cbind(timepoints) %>%
    reshape2::melt(id.vars=c("timepoint")) 
  
  explore.proteins.scatter.data.iaa <-
    all.data.hits %>% 
    select(
      'Abundance.Ratio..log2....IAA..0.....PBS..0.',
      'Abundance.Ratio..log2....IAA..9.....PBS..0.',
      'Abundance.Ratio..log2....IAA..30.....PBS..0.',
      'Abundance.Ratio..log2....IAA..300.....PBS..0.') %>% 
    t %>%
    set_colnames (all.data.hits$unique.id.3.phossite) %>%
    cbind(timepoints) %>%
    reshape2::melt(id.vars=c("timepoint")) 
  
  group.means.iaa <- explore.proteins.scatter.data.iaa %>%
    group_by(timepoint) %>%
    summarize(aggregate.value = median(value)) %>%
    cbind(IAAmeantext)
  
  group.means.pbs <- explore.proteins.scatter.data.pbs %>%
    group_by(timepoint) %>%
    summarize(aggregate.value = median(value)) %>%
    cbind(PBSmeantext)
  
  ## Peptide plot
  explore.proteins.scatter <- 
    (ggplot(explore.proteins.scatter.data.iaa, aes(x=as.numeric(timepoint), y=value, color="IAA", group = variable,
                                                   text = paste0("Unique ID: ", variable))) +
       geom_line(size = .3, alpha = .3) +
       geom_line(data = explore.proteins.scatter.data.pbs, aes(x=as.character(timepoint), y=value, color='PBS', group = variable),
                 size = .3, alpha = .3) +
       geom_line(data = group.means.pbs, aes(x=as.numeric(timepoint), y=aggregate.value, color='PBS Median'),
                 size = 1) +
       geom_line(data = group.means.iaa, aes(x=as.numeric(timepoint), y=aggregate.value, color='IAA Median'),
                 size = 1) +
       scale_color_manual(values = c("IAA" = 'lightgray', 
                                     "PBS" = 'aquamarine3',
                                     "PBS Median" = 'darkgreen',
                                     "IAA Median" = 'darkgray')) +
       scale_x_discrete(breaks=c(1, 2, 3, 4),
                        labels=c("0", "9", "30", "300"),
                        expand = c(0,0)) +
       aliceTheme +
       annotate("text", x=-Inf, y=-Inf, label= paste0(nrow(timecourse.dataset), " Peptides", '; ',
                                                      length(unique(timecourse.dataset$Master.Protein.Accessions)), " Proteins"),
                hjust = -.3, vjust = -1) +
       labs(title = "", x = "Time (s)", y ="L2FC in Abundance (/PBS0)") +
       ylim(-3, 5) +
       theme(aspect.ratio = 1))
  lineplot.legend <<- cowplot::get_legend(explore.proteins.scatter)
  return ( explore.proteins.scatter + theme(legend.position="none")
  ) #%>%
  # set_panel_size(width = unit(10, 'cm'), height = unit(10, 'cm'))
  # , plot.margin=unit(c(1,1,-15,1), "cm")
  
  
}

plot.peptide.heatmaps <- function (timecourse.dataset, min.l2fc, max.l2fc) {
  
  # timecourse.dataset <- timecourse.dataset[order(timecourse.dataset$phenotype),]
  data.for.heatmap <- timecourse.dataset %>%
    select('Master.Protein.Accessions',
           'Abundance.Ratio..log2....PBS..0.....PBS..0.',
           'Abundance.Ratio..log2....PBS..9.....PBS..0.',
           'Abundance.Ratio..log2....PBS..30.....PBS..0.',
           'Abundance.Ratio..log2....PBS..300.....PBS..0.',
           'Abundance.Ratio..log2....IAA..0.....PBS..0.',
           'Abundance.Ratio..log2....IAA..9.....PBS..0.',
           'Abundance.Ratio..log2....IAA..30.....PBS..0.',
           'Abundance.Ratio..log2....IAA..300.....PBS..0.')
  colnames(data.for.heatmap) <- c('Gene.ID', 'PBS 0', 'PBS 9', 'PBS 30', 'PBS 300',
                                  'IAA 0', 'IAA 9', 'IAA 30', 'IAA 300')
  
  data.for.heatmap.named <- data.for.heatmap[,c(2:9)]
  
  row.names(data.for.heatmap.named) <-  make.unique(as.character(data.for.heatmap$Gene.ID))
  
  phos.data.heatmap <- as.matrix(data.for.heatmap.named)
  ha = HeatmapAnnotation(df = timecourse.dataset %>%
                           select('phenotype'), which = "row", width = unit(1, "cm"),
                         col = list(phenotype = colorRamp2(c(-7,0,3), c(cybrDiv[1], cybrDiv[5], cybrDiv[9]))),
                         na_col = "red",
                         show_annotation_name = F,
                         show_legend = F,
                         annotation_legend_param = list(title = "Phenotype Score"))
  rowa = columnAnnotation(labels = anno_text(timecourse.dataset$Callout.Name, which = "column",
                                          gp = gpar(fontsize = '8',
                                                    fontfamily = "Helvetica")))
  cluster.map <- Heatmap(matrix = phos.data.heatmap %>% t(), name = 'phos.data.heatmap', show_row_dend = F, show_column_dend = F,
                         cluster_columns=FALSE, cluster_rows = FALSE,
                         show_row_names = T, show_column_names = F,
                         show_heatmap_legend = F, height = 1*unit(50, "mm"),
                         width = unit(22, 'cm'),
                         col = colorRamp2(c(min.l2fc, 0, max.l2fc), c(rdbuDiv[9], rdbuDiv[5], rdbuDiv[1])),
                         na_col = "black",
                         bottom_annotation = rowa) 
  return( draw(cluster.map + ha,
               auto_adjust = FALSE) %>% grid.grabExpr() )
}

trim <- function(x){
  x[(x > mean(x, na.rm = T)-1.5*IQR(x, na.rm = T)) & (x < mean(x, na.rm = T)+1.5*IQR(x, na.rm = T))]
}

'%!in%' <- function(x,y)!('%in%'(x,y))

compare.replicates <- function(timecourse.dataset, condition1, condition2,
                               title, dotcolor){
  
  timecourse.dataset$condition1 <- timecourse.dataset[[condition1]]
  timecourse.dataset$condition2 <- timecourse.dataset[[condition2]]
  
  r2value <- summary(lm(condition1 ~ 
                          condition2, data=timecourse.dataset))$r.squared 
  
  Rep.compare.plot <- ggplot(data = timecourse.dataset,
                             aes(x = condition1,
                                 y = condition2)) +
    geom_point(size = 1, color = dotcolor, alpha = .5) +
    aliceTheme +
    labs(title = title, x = "Replicate 1", y = "Replicate 2") +
    geom_smooth(method ='lm', color = 'black') +
    annotate("text",
             label = paste0("r^2=",r2value),
             y = 3000, 
             x = 3000) +
    scale_y_log10() +
    scale_x_log10() +
    theme(legend.position = "none", aspect.ratio = 1) 
  
  return (list(Rep.compare.plot, r2value))
}


add.back.modifications <- function(orig.df){
  
  ## Add back Modifications in Master /(all sites) to peptides that have low probabilities
  ## Input: dataframe that does not have modifications for all peptides
  ## Output: dataframe that does have modifications for all peptides
  
  ## Check if the peptide has any string in master.Modifications..all.possible.sites.
  for (index.peptide in 1:nrow(orig.df)){
    
    ## First, separate all possible Modifications by "; " 
    ## eg: "1xTMTpro [K15]  1xTMTpro [N-Term]  1xPhospho [S1(0.1); S2(49.9); S3(49.9)]"
    listed.possible.Modifications <- unlist(str_split(as.character(
      orig.df$Modifications..all.possible.sites.[index.peptide]), ';[[:blank:]](?=\\d)'))
    
    lpm.simple <- unlist(str_split(as.character(
      orig.df$Modifications[index.peptide]), ';[[:blank:]](?=\\d)'))
    
    ## Second, remove the modification if it is TMT-derived.
    listed.possible.Modifications <- listed.possible.Modifications[!grepl("TMTpro", listed.possible.Modifications)]
    
    lpm.simple <- lpm.simple[!grepl("TMTpro", lpm.simple)]
    
    ## Extract the peptide's starting placement in the whole protein. For use later.
    ## ex 200 and TGGT1_123456
    peptide.pos.start <- as.numeric(
      stri_extract_first(orig.df$Positions.in.Master.Proteins[index.peptide], regex = '(?<=\\[)[[:digit:]]*(?=\\-)')) - 1
    peptide.geneid <- str_extract(orig.df$Positions.in.Master.Proteins[index.peptide], 'TGGT1_.*(?=\\s)')
    
    ## Third, for each residue,
    listed.possible.Modifications <- lapply(listed.possible.Modifications, function (x) {
      ## Extract the locations of each residue in the peptide.
      ## ex 1 4 5 19
      locations.in.peptide <- as.numeric(
        as.list( str_extract_all(x, '[[:digit:]]+(?=\\]|\\(|;)') )[[1]] )
      
      ## Add the starting position to each location.
      ## ex 201 204 205 219
      locations.in.peptide <- locations.in.peptide + peptide.pos.start
      
      ## List of residues. ex
      ## get 1xPhospho [S1(50); T4(50); Y5(0); T19(0)] in format
      ## list: S1(50) T4(50) etc
      listed.possible.residues <- unlist(str_split(as.character(
        (x %>% 
           str_extract('(?<=\\[).*(?=\\])') ) ), '; '))
      
      ## For each residue, gsub the current position with the current position + start
      ## ex S201(50) T204(50)
      for (residue.index in 1:length(listed.possible.residues)){
        ## Extract the residue as a number
        ## ex "1"
        residue.number <- listed.possible.residues[residue.index] %>%
          str_extract('(?<=[[:alpha:]])[[:digit:]]*(?=$|\\()')
        
        ## Replace with proper number.
        ## ex "S201(50)
        listed.possible.residues[residue.index] <- sub(pattern ='(?<=[[:alpha:]])[[:digit:]]*(?=$|\\()',
                                                       replacement = as.character(locations.in.peptide[residue.index]),
                                                       x = listed.possible.residues[residue.index],
                                                       perl = T)
      }
      
      ## Frankenstein the correct components together and add to updated master.Modifications..all.possible.sites.
      mod.count.and.type <- str_extract(x, '[[:digit:]]*x[[:alpha:]]*')
      semicolon.joined.residues <- paste(listed.possible.residues, collapse = '; ')
      return(paste0(mod.count.and.type,
                    " [", semicolon.joined.residues, "]" ))
    } )
    
    ## This only works if any non-numeric residues are listed last, which they are.
    lpm.simple <- lapply(lpm.simple, function (x) {
      original.lpm.x <- x
      
      ## Extract the locations of each residue in the peptide.
      ## ex 1 4 5 19
      locations.in.peptide <- as.numeric(
        as.list( str_extract_all(x, '[[:digit:]]+(?=\\]|\\(|;)') )[[1]] )
      
      ## Add the starting position to each location.
      ## ex 201 204 205 219
      locations.in.peptide <- locations.in.peptide + peptide.pos.start
      
      ## List of residues. ex
      ## get 1xPhospho [S1(50); T4(50); Y5(0); T19(0)] in format
      ## list: S1(50) T4(50) etc
      listed.possible.residues <- unlist(str_split(as.character(
        (x %>% 
           str_extract('(?<=\\[).*(?=\\])') ) ), '; '))
      
      ## For each residue, gsub the current position with the current position + start
      ## ex S201(50) T204(50)
      for (residue.index in 1:length(listed.possible.residues)){
        ## If the residue has a number:
        if (isTRUE(grepl("[[:digit:]]", listed.possible.residues[residue.index]))){
          ## Extract the residue as a number
          ## ex "1"
          residue.number <- listed.possible.residues[residue.index] %>%
            str_extract('(?<=[[:alpha:]])[[:digit:]]*(?=$|\\()')
          
          ## Replace with proper number.
          ## ex "S201(50)
          listed.possible.residues[residue.index] <- sub(pattern ='(?<=[[:alpha:]])[[:digit:]]*(?=$|\\()',
                                                         replacement = as.character(locations.in.peptide[residue.index]),
                                                         x = listed.possible.residues[residue.index],
                                                         perl = T)
        }
      }
      
      ## Frankenstein the correct components together and add to updated master.Modifications..all.possible.sites.
      mod.count.and.type <- str_extract(x, '[[:digit:]]*x[[:alpha:]]*')
      semicolon.joined.residues <- paste(listed.possible.residues, collapse = '; ')
      return(paste0(mod.count.and.type,
                    " [", semicolon.joined.residues, "]" ))
    } )
    
    ## Replace original entries ("") with new entries 
    orig.df$Modifications.in.Master.Proteins..all.Sites.[index.peptide] <- paste0(peptide.geneid, " ",
                                                                                  paste(listed.possible.Modifications, collapse = "; "))
    orig.df$Modifications.in.Master.Proteins[index.peptide] <- paste0(peptide.geneid, " ",
                                                                      paste(lpm.simple, collapse = "; "))
  }
  
  return (orig.df)
  
}

get.unique.ids <- function(df.to.add.unique.ids.to) {apply(df.to.add.unique.ids.to[,c('No.Phospho',
                                                                                      'Sequence',
                                                                                      'Master.Protein.Accessions')], 1,
                                                           function(y) {return (paste0(y['No.Phospho'],
                                                                                       y['Sequence'],
                                                                                       y['Master.Protein.Accessions']))})}
get.unique.ids.thioP <- function(df.to.add.unique.ids.to) {apply(df.to.add.unique.ids.to[,c('Modifications.in.Master.Proteins',
                                                                                      'Qvality.PEP')], 1,
                                                           function(y) {return (paste0(y['Modifications.in.Master.Proteins'],
                                                                                       y['Qvality.PEP']))})}
get.unique.ids2 <- function(df.to.add.unique.ids.to) {apply(df.to.add.unique.ids.to[,c('Abundance.Ratio...IAA..9.....IAA..0.',
                                                                                      'Sequence',
                                                                                      'Master.Protein.Accessions')], 1,
                                                           function(y) {return (paste0(as.numeric(y['Abundance.Ratio...IAA..9.....IAA..0.']),
                                                                                       y['Sequence'],
                                                                                       y['Master.Protein.Accessions']))})}
get.unique.ids3 <- function(df.to.add.unique.ids.to) {apply(df.to.add.unique.ids.to[,c(
                                                                                       'Sequence',
                                                                                       'Master.Protein.Accessions')], 1,
                                                            function(y) {return (paste0(
                                                                                        y['Sequence'],
                                                                                        y['Master.Protein.Accessions']))})}

get.unique.ids.intrinsic <- function(df.to.add.unique.ids.to) {apply(df.to.add.unique.ids.to[,c('Modifications',
                                                                                                'Sequence',
                                                                                                'Modifications.in.Master.Proteins')], 1,
                                                                     function(y) {return (paste0(y['Modifications'],
                                                                                                 y['Sequence'],
                                                                                                 y['Modifications.in.Master.Proteins']))})}
retrieve.all.possible.site.numbers.from.df <- function (df.to.return.site.numbers) {
  ## Applies retrieve.site.number.all.possible() to an entire dataframe
  apply(df.to.return.site.numbers[,c('Sequence',
                                     'Master.Protein.Accessions',
                                     'No.Phospho',
                                     'Modifications.in.Master.Proteins',
                                     'Positions.in.Master.Proteins',
                                     'Modifications.in.Master.Proteins..all.Sites.')], 1,
        function(y) retrieve.site.number.all.possible(y['Sequence'],
                                                      y['Master.Protein.Accessions'],
                                                      y['No.Phospho'],
                                                      y['Modifications.in.Master.Proteins'],
                                                      y['Positions.in.Master.Proteins'],
                                                      y['Modifications.in.Master.Proteins..all.Sites.']))}

retrieve.site.number.all.possible <- function(sequence, gene.id, num.phos, modifications, positions, all.mods) {
  
  ## Function to determine location of phos site in master protein (all possible)
  ## input: for a given phosphopeptide, input Peptide Sequence, Master.Protein.Accessions, 
  ##  No.Phospho, Modifications.in.Master.Proteins, Positions.in.Master.Proteins,
  ##  Modifications.in.Master.Proteins..all.Sites.
  
  ## output: location of phosphosite in residues from start of PROTEIN (1 indexed).
  ## string.of.mods: for example, "S/T/Y; S53"
  string.of.mods <- stri_extract_first(modifications, 
                                       regex = "(?<=\\[).+?(?=\\])")[[1]]
  list.of.mods <- unlist(strsplit(as.character(string.of.mods), '; ', fixed = T))
  site.numbers <- c()
  
  ## for each modification:
  for (i in 1:length(list.of.mods)){
    ## if in form "S27" or "T25", etc, extract this site number.
    if (isTRUE(!is.na(as.numeric(str_sub(
      list.of.mods[i], 2, 2)
    )))){
      site.numbers <- c(site.numbers, as.numeric( str_extract(list.of.mods[i], "\\-*\\d+\\.*\\d*") ))
    }
    
    else {
      ## if in form "S/T", etc, extract all possible modification indices.
      string.of.all.mods <- stri_extract_first(all.mods, regex = "(?<=\\[).+?(?=\\])")[[1]]
      list.of.all.mods <- unlist(strsplit(as.character(string.of.all.mods), '; ', fixed = T))
      all.site.numbers <- c()
      
      ## for each of all possible modifications:
      for (i in 1:length(list.of.all.mods)){
        ## extract site number.
        all.site.numbers <- c(all.site.numbers, as.numeric( str_extract(list.of.all.mods[i], 
                                                                        "\\-*\\d+\\.*\\d*") ))
      }
      
      site.numbers <- append(site.numbers, all.site.numbers)
    }
  }
  return( site.numbers %>% unique() )
}

match.nt.and.phos.site <- function(phos.site, gene.id, num.phos, index, unique.id, threshold, master.mods){
  ### Function to add phosphopeptides in thioP that have matches in timecourse to pre-created dataframe `hits`.
  ## input: for a given phosphopeptide in thioP, input phos.site.in.master, Master.Protein.Accessions,
  ##  No.phospho, Index, Unique.ID.3, threshold for match (distance in nt), Origin.Known. 
  ### timecourse.data must exist and be properly filled.
  
  ## output: outputs 'not yet' if unsuitable, 'hit' if phosphopeptide in thioP has >= 1 match in timecourse,
  ##  or 'not hit' if phosphopeptide has 0 matches in timecourse. Though, main purpose is to modify `hits`.
  
  ## Obtain a given thioP peptide's min and max phos site indices
  phos.site.thioP.min <- min(phos.site[[1]])
  phos.site.thioP.max <- max(phos.site[[1]])
  
  ## look through timecourse data for Gene ID matches with given thioP peptide
  if (isTRUE(length(grep(gene.id, timecourse.data$Master.Protein.Accessions)) != 0)) {
    ## if Gene ID matches, add timecourse peptide to a list of possible matches
    possible.matches <- timecourse.data$Index[grep(gene.id, 
                                                   timecourse.data$Master.Protein.Accessions)]
    ## look through list of possible matches
    lapply(possible.matches, function(x){ 
      
      ## Obtain a given timecourse peptide's min and max phos site indices
      phos.site.timecourse.min <- min(timecourse.data$phos.site.in.master[[as.numeric(x)]])
      phos.site.timecourse.max <- max(timecourse.data$phos.site.in.master[[as.numeric(x)]])
      
      if (isTRUE( ((as.numeric(phos.site.thioP.min)-threshold) <= phos.site.timecourse.max) & 
                  (phos.site.timecourse.min <= (as.numeric(phos.site.thioP.max)+threshold)) ) ){
        ## if: thioP min index - match threshold <= timecourse max index AND 
        ##  timecourse min index <= thioP max index + match threshold, consider this 
        ##  timecourse peptide to be a match for the given thioP peptide, and add it to `hits`.
        # hits[nrow(hits)+1,] <<- c(index,
        #                           x,
        #                           gene.id,
        #                           phos.site,
        #                           timecourse.data$phos.site.in.master[x],
        #                           unique.id,
        #                           timecourse.data$Unique.ID[x],
        #                           abs(as.numeric(mean(c(phos.site.thioP.min,
        #                                                 phos.site.thioP.max)))-as.numeric(mean(c(phos.site.timecourse.min,
        #                                                                                          phos.site.timecourse.max)))),
        #                           master.mods,
        #                           timecourse.data$Modifications.in.Master.Proteins..all.Sites.[x])
        hits[nrow(hits)+1,] <<- as.data.frame(cbind(index,
                                                    x,
                                                    gene.id,
                                                    phos.site,
                                                    timecourse.data$phos.site.in.master[x],
                                                    unique.id,
                                                    timecourse.data$Unique.ID[x],
                                                    abs(as.numeric(mean(c(phos.site.thioP.min,
                                                                          phos.site.thioP.max)))-as.numeric(mean(c(phos.site.timecourse.min,
                                                                                                                   phos.site.timecourse.max)))),
                                                    master.mods,
                                                    timecourse.data$Modifications.in.Master.Proteins..all.Sites.[x]))
        return('hit')
      }
      else {return('not hit')}})
  } 
  else {return('none')}
  
}



match.nt.and.phos.site.nonsig <- function(phos.site, gene.id, num.phos, index, unique.id, threshold, master.mods){
  ### Function to add phosphopeptides in thioP that have matches in timecourse to pre-created dataframe `hits`.
  ## input: for a given phosphopeptide in thioP, input phos.site.in.master, Master.Protein.Accessions,
  ##  No.phospho, Index, Unique.ID.3, threshold for match (distance in nt), Origin.Known. 
  ### timecourse.data must exist and be properly filled.
  
  ## output: outputs 'not yet' if unsuitable, 'hit' if phosphopeptide in thioP has >= 1 match in timecourse,
  ##  or 'not hit' if phosphopeptide has 0 matches in timecourse. Though, main purpose is to modify `hits`.
  
  ## Obtain a given thioP peptide's min and max phos site indices
  phos.site.thioP.min <- min(phos.site[[1]])
  phos.site.thioP.max <- max(phos.site[[1]])
  
  ## look through timecourse data for Gene ID matches with given thioP peptide
  if (isTRUE(length(grep(gene.id, timecourse.data$Master.Protein.Accessions)) != 0)) {
    ## if Gene ID matches, add timecourse peptide to a list of possible matches
    possible.matches <- timecourse.data$Index[grep(gene.id, 
                                                   timecourse.data$Master.Protein.Accessions)]
    ## look through list of possible matches
    lapply(possible.matches, function(x){ 
      
      ## Obtain a given timecourse peptide's min and max phos site indices
      phos.site.timecourse.min <- min(timecourse.data$phos.site.in.master[[as.numeric(x)]])
      phos.site.timecourse.max <- max(timecourse.data$phos.site.in.master[[as.numeric(x)]])
      
      if (isTRUE( ((as.numeric(phos.site.thioP.min)-threshold) <= phos.site.timecourse.max) & 
                  (phos.site.timecourse.min <= (as.numeric(phos.site.thioP.max)+threshold)) ) ){
        ## if: thioP min index - match threshold <= timecourse max index AND 
        ##  timecourse min index <= thioP max index + match threshold, consider this 
        ##  timecourse peptide to be a match for the given thioP peptide, and add it to `hits`.
        # hits[nrow(hits)+1,] <<- c(index,
        #                           x,
        #                           gene.id,
        #                           phos.site,
        #                           timecourse.data$phos.site.in.master[x],
        #                           unique.id,
        #                           timecourse.data$Unique.ID[x],
        #                           abs(as.numeric(mean(c(phos.site.thioP.min,
        #                                                 phos.site.thioP.max)))-as.numeric(mean(c(phos.site.timecourse.min,
        #                                                                                          phos.site.timecourse.max)))),
        #                           master.mods,
        #                           timecourse.data$Modifications.in.Master.Proteins..all.Sites.[x])
        hits.nonsigTC[nrow(hits.nonsigTC)+1,] <<- as.data.frame(cbind(index,
                                                                      x,
                                                                      gene.id,
                                                                      phos.site,
                                                                      timecourse.data$phos.site.in.master[x],
                                                                      unique.id,
                                                                      timecourse.data$Unique.ID[x],
                                                                      abs(as.numeric(mean(c(phos.site.thioP.min,
                                                                                            phos.site.thioP.max)))-as.numeric(mean(c(phos.site.timecourse.min,
                                                                                                                                     phos.site.timecourse.max)))),
                                                                      master.mods,
                                                                      timecourse.data$Modifications.in.Master.Proteins..all.Sites.[x]))
        return('hit')
      }
      else {return('not hit')}})
  } 
  else {return('none')}
  
}

plot_2clusters_with_heatmap <- function(phos.data.clustering, title, cluster.method, V) {
  # Inputs: phos.data.clustering = data from phosphoproteomics experiments
  # to be clustered, title = plot title, cluster.method = projection_based or mclust
  # V[1] = cluster assignments as vector (projection based only)
  
  if (cluster.method == "projection_based") {
    phos.data.clustering <- data.frame(phos.data.clustering)
    if (typeof(V) == 'integer') {
      clusters <- data.frame(V)
    } 
    if (typeof(V) == 'list') {
      clusters <- data.frame(as.list(V[1]))
    } 
    phos.data.clustering$cluster <- clusters[[1]]
    
  }
  
  num.clusters <- max(phos.data.clustering$cluster)
  min.l2fc <- min(phos.data.clustering %>%
                    select('Abundance.Ratio..log2....PBS..0.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..9.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..30.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..300.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..0.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..9.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..30.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..300.....PBS..0.'))
  max.l2fc <- max(phos.data.clustering %>%
                    select('Abundance.Ratio..log2....PBS..0.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..9.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..30.....PBS..0.',
                           'Abundance.Ratio..log2....PBS..300.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..0.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..9.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..30.....PBS..0.',
                           'Abundance.Ratio..log2....IAA..300.....PBS..0.'))
  
  main.legend <- draw(Legend(title = paste0("L2FC in Abundance",
                                            '\n', "(/PBS0)"), 
                             col_fun = colorRamp2(c(min.l2fc, 0, max.l2fc), c(rdbuDiv[9], rdbuDiv[5], rdbuDiv[1])))) %>% grid.grabExpr()
  anno.legend <- draw(Legend(title = "Phenotype Score", 
                             col_fun = colorRamp2(c(-7,0,3), c(cybrDiv[1], cybrDiv[5], cybrDiv[9])))) %>% grid.grabExpr()
  
  #filter by cluster, then in loop, plot:
  cluster.bothplots = vector(mode = "list", length = num.clusters*2)
  for(i in 1:(length(cluster.bothplots)/2)){
    phos.data.clustering.thiscluster <- phos.data.clustering %>%
      filter(cluster == i)
    cluster.bothplots[[i]] <- plot.peptide.profiles.preCR(phos.data.clustering.thiscluster)
  }
  for(i in (num.clusters+1):(length(cluster.bothplots)/2 + num.clusters)){
    phos.data.clustering.thiscluster <- phos.data.clustering %>%
      filter(cluster == i-num.clusters)
    cluster.bothplots[[i]] <- plot.peptide.heatmaps(phos.data.clustering.thiscluster, min.l2fc, max.l2fc)
  }
  legends.grob <- list(main.legend, anno.legend, lineplot.legend)
  plot.title = title
  
  cluster.bothplots[[5]] <- lineplot.legend
  cluster.bothplots[[6]] <- main.legend
  cluster.bothplots[[7]] <- anno.legend
  lay <- rbind(c(1,2,5),
               c(1,2,5),
               c(3,4,6),
               c(3,4,7))
  return(grid.arrange(grobs = cluster.bothplots, layout_matrix = lay, 
                      top = textGrob(plot.title, x = 0, hjust = -.1, vjust = 2)))
  
}
